1
2
3
4
5
6
7
8
9
10
11 """Restriction Enzyme classes.
12
13 Notes about the diverses class of the restriction enzyme implementation::
14
15 RestrictionType is the type of all restriction enzymes.
16 ----------------------------------------------------------------------------
17 AbstractCut implements some methods that are common to all enzymes.
18 ----------------------------------------------------------------------------
19 NoCut, OneCut,TwoCuts represent the number of double strand cuts
20 produced by the enzyme.
21 they correspond to the 4th field of the
22 rebase record emboss_e.NNN.
23 0->NoCut : the enzyme is not characterised.
24 2->OneCut : the enzyme produce one double strand cut.
25 4->TwoCuts : two double strand cuts.
26 ----------------------------------------------------------------------------
27 Meth_Dep, Meth_Undep represent the methylation susceptibility to
28 the enzyme.
29 Not implemented yet.
30 ----------------------------------------------------------------------------
31 Palindromic, if the site is palindromic or not.
32 NotPalindromic allow some optimisations of the code.
33 No need to check the reverse strand
34 with palindromic sites.
35 ----------------------------------------------------------------------------
36 Unknown, Blunt, represent the overhang.
37 Ov5, Ov3 Unknown is here for symmetry reasons and
38 correspond to enzymes that are not
39 characterised in rebase.
40 ----------------------------------------------------------------------------
41 Defined, Ambiguous, represent the sequence of the overhang.
42 NotDefined
43 NotDefined is for enzymes not characterised
44 in rebase.
45
46 Defined correspond to enzymes that display
47 a constant overhang whatever the sequence.
48 ex : EcoRI. G^AATTC -> overhang :AATT
49 CTTAA^G
50
51 Ambiguous : the overhang varies with the
52 sequence restricted.
53 Typically enzymes which cut outside their
54 restriction site or (but not always)
55 inside an ambiguous site.
56 ex:
57 AcuI CTGAAG(22/20) -> overhang : NN
58 AasI GACNNN^NNNGTC -> overhang : NN
59 CTGN^NNNNNCAG
60
61 note : these 3 classes refers to the overhang not the site.
62 So the enzyme ApoI (RAATTY) is defined even if its
63 restriction site is ambiguous.
64
65 ApoI R^AATTY -> overhang : AATT -> Defined
66 YTTAA^R
67 Accordingly, blunt enzymes are always Defined even
68 when they cut outside their restriction site.
69 ----------------------------------------------------------------------------
70 Not_available, as found in rebase file emboss_r.NNN files.
71 Commercially_available
72 allow the selection of the enzymes
73 according to their suppliers to reduce the
74 quantity of results.
75 Also will allow the implementation of
76 buffer compatibility tables. Not
77 implemented yet.
78
79 the list of suppliers is extracted from
80 emboss_s.NNN
81 ----------------------------------------------------------------------------
82
83 """
84
85 from __future__ import print_function
86
87 import warnings
88
89 from Bio._py3k import zip
90 from Bio._py3k import filter
91 from Bio._py3k import range
92
93 import re
94 import itertools
95
96 from Bio.Seq import Seq, MutableSeq
97 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
98 from Bio.Restriction.Restriction_Dictionary import typedict
99 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
100 from Bio.Restriction.PrintFormat import PrintFormat
101 from Bio import BiopythonWarning
109 """Check characters in a string (PRIVATE).
110
111 Remove digits and white space present in string. Allows any valid ambiguous
112 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
113
114 Other characters (e.g. symbols) trigger a TypeError.
115
116 Returns the string WITH A LEADING SPACE (!). This is for backwards
117 compatibility, and may in part be explained by the fact that
118 Bio.Restriction doesn't use zero based counting.
119 """
120
121 seq_string = "".join(seq_string.split()).upper()
122
123 for c in "0123456789":
124 seq_string = seq_string.replace(c, "")
125
126 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")):
127 raise TypeError("Invalid character found in %s" % repr(seq_string))
128 return " " + seq_string
129
130
131 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN',
132 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY',
133 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY',
134 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY',
135 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY',
136 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'}
137
138 DNA = Seq
238
241 """RestrictionType. Type from which all enzyme classes are derived.
242
243 Implement the operator methods.
244 """
245
246 - def __init__(cls, name='', bases=(), dct=None):
247 """Initialize RestrictionType instance.
248
249 Not intended to be used in normal operation. The enzymes are
250 instantiated when importing the module.
251 See below.
252 """
253 if "-" in name:
254 raise ValueError("Problem with hyphen in %s as enzyme name"
255 % repr(name))
256
257
258
259 try:
260 cls.compsite = re.compile(cls.compsite)
261 except Exception:
262 raise ValueError("Problem with regular expression, re.compiled(%s)"
263 % repr(cls.compsite))
264
277
279 """Override '/' operator to use as search method.
280
281 >>> EcoRI/Seq('GAATTC')
282 [2]
283 Returns RE.search(other).
284 """
285 return cls.search(other)
286
288 """Override division with reversed operands to use as search method.
289
290 >>> Seq('GAATTC')/EcoRI
291 [2]
292 Returns RE.search(other).
293 """
294 return cls.search(other)
295
297 """Override Python 3 division operator to use as search method.
298
299 Like __div__.
300 """
301 return cls.search(other)
302
304 """As __truediv___, with reversed operands.
305
306 Like __rdiv__.
307 """
308 return cls.search(other)
309
311 """Override '//' operator to use as catalyse method.
312
313 >>> EcoRI//Seq('GAATTC')
314 (Seq('G', Alphabet()), Seq('AATTC', Alphabet()))
315 Returns RE.catalyse(other).
316 """
317 return cls.catalyse(other)
318
320 """As __floordiv__, with reversed operands.
321
322 >>> Seq('GAATTC')//EcoRI
323 (Seq('G', Alphabet()), Seq('AATTC', Alphabet()))
324 Returns RE.catalyse(other).
325 """
326 return cls.catalyse(other)
327
329 """Return the name of the enzyme as string."""
330 return cls.__name__
331
333 """Implement repr method.
334
335 Used with eval or exec will instantiate the enzyme.
336 """
337 return "%s" % cls.__name__
338
340 """Return lenght of recognition site of enzyme as int."""
341 return cls.size
342
344
345
346 return id(cls)
347
349 """Override '==' operator.
350
351 True if RE and other are the same enzyme.
352
353 Specifically this checks they are the same Python object.
354 """
355
356 return id(cls) == id(other)
357
359 """Override '!=' operator.
360
361 Isoschizomer strict (same recognition site, same restriction) -> False
362 All the other-> True
363
364 WARNING - This is not the inverse of the __eq__ method
365 >>> SacI != SstI # true isoschizomers
366 False
367 >>> SacI == SstI
368 False
369 """
370 if not isinstance(other, RestrictionType):
371 return True
372 elif cls.charac == other.charac:
373 return False
374 else:
375 return True
376
378 """Override '>>' operator to test for neoschizomers.
379
380 neoschizomer : same recognition site, different restriction. -> True
381 all the others : -> False
382 >>> SmaI >> XmaI
383 True
384 """
385 if not isinstance(other, RestrictionType):
386 return False
387 elif cls.site == other.site and cls.charac != other.charac:
388 return True
389 else:
390 return False
391
393 """Override '%' operator to test for compatible overhangs.
394
395 True if a and b have compatible overhang.
396 >>> XhoI % SalI
397 True
398 """
399 if not isinstance(other, RestrictionType):
400 raise TypeError(
401 'expected RestrictionType, got %s instead' % type(other))
402 return cls._mod1(other)
403
405 """Compare length of recognition site of two enzymes.
406
407 Override '>='. a is greater or equal than b if the a site is longer
408 than b site. If their site have the same length sort by alphabetical
409 order of their names.
410 >>> EcoRI.size
411 6
412 >>> EcoRV.size
413 6
414 >>> EcoRI >= EcoRV
415 False
416 """
417 if not isinstance(other, RestrictionType):
418 raise NotImplementedError
419 if len(cls) > len(other):
420 return True
421 elif cls.size == len(other) and cls.__name__ >= other.__name__:
422 return True
423 else:
424 return False
425
427 """Compare length of recognition site of two enzymes.
428
429 Override '>'. Sorting order:
430 1. size of the recognition site.
431 2. if equal size, alphabetical order of the names.
432
433 """
434 if not isinstance(other, RestrictionType):
435 raise NotImplementedError
436 if len(cls) > len(other):
437 return True
438 elif cls.size == len(other) and cls.__name__ > other.__name__:
439 return True
440 else:
441 return False
442
444 """Compare length of recognition site of two enzymes.
445
446 Override '<='. Sorting order:
447 1. size of the recognition site.
448 2. if equal size, alphabetical order of the names.
449
450 """
451 if not isinstance(other, RestrictionType):
452 raise NotImplementedError
453 elif len(cls) < len(other):
454 return True
455 elif len(cls) == len(other) and cls.__name__ <= other.__name__:
456 return True
457 else:
458 return False
459
461 """Compare length of recognition site of two enzymes.
462
463 Override '<'. Sorting order:
464 1. size of the recognition site.
465 2. if equal size, alphabetical order of the names.
466
467 """
468 if not isinstance(other, RestrictionType):
469 raise NotImplementedError
470 elif len(cls) < len(other):
471 return True
472 elif len(cls) == len(other) and cls.__name__ < other.__name__:
473 return True
474 else:
475 return False
476
479 """Implement the methods that are common to all restriction enzymes.
480
481 All the methods are classmethod.
482
483 For internal use only. Not meant to be instantiated.
484 """
485
486 @classmethod
487 - def search(cls, dna, linear=True):
488 """Return a list of cutting sites of the enzyme in the sequence.
489
490 Compensate for circular sequences and so on.
491
492 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
493
494 If linear is False, the restriction sites that span over the boundaries
495 will be included.
496
497 The positions are the first base of the 3' fragment,
498 i.e. the first base after the position the enzyme will cut.
499 """
500
501
502
503
504
505
506
507 if isinstance(dna, FormattedSeq):
508 cls.dna = dna
509 return cls._search()
510 else:
511 cls.dna = FormattedSeq(dna, linear)
512 return cls._search()
513
514 @classmethod
516 """Print all the suppliers of restriction enzyme."""
517 supply = sorted(x[0] for x in suppliers_dict.values())
518 print(",\n".join(supply))
519 return
520
521 @classmethod
523 """Test for real isoschizomer.
524
525 True if other is an isoschizomer of RE, but not an neoschizomer,
526 else False.
527
528 Equischizomer: same site, same position of restriction.
529 >>> SacI.is_equischizomer(SstI)
530 True
531 >>> SmaI.is_equischizomer(XmaI)
532 False
533
534 """
535 return not cls != other
536
537 @classmethod
539 """Test for neoschizomer.
540
541 True if other is an isoschizomer of RE, else False.
542 Neoschizomer: same site, different position of restriction.
543 """
544 return cls >> other
545
546 @classmethod
548 """Test for same recognition site.
549
550 True if other has the same recognition site, else False.
551
552 Isoschizomer: same site.
553 >>> SacI.is_isoschizomer(SstI)
554 True
555 >>> SmaI.is_isoschizomer(XmaI)
556 True
557
558 """
559 return (not cls != other) or cls >> other
560
561 @classmethod
563 """List equischizomers of the enzyme.
564
565 Return a tuple of all the isoschizomers of RE.
566 If batch is supplied it is used instead of the default AllEnzymes.
567
568 Equischizomer: same site, same position of restriction.
569 """
570 if not batch:
571 batch = AllEnzymes
572 r = [x for x in batch if not cls != x]
573 i = r.index(cls)
574 del r[i]
575 r.sort()
576 return r
577
578 @classmethod
580 """List neoschizomers of the enzyme.
581
582 Return a tuple of all the neoschizomers of RE.
583 If batch is supplied it is used instead of the default AllEnzymes.
584
585 Neoschizomer: same site, different position of restriction.
586 """
587 if not batch:
588 batch = AllEnzymes
589 r = sorted(x for x in batch if cls >> x)
590 return r
591
592 @classmethod
594 """List all isoschizomers of the enzyme.
595
596 Return a tuple of all the equischizomers and neoschizomers of RE.
597 If batch is supplied it is used instead of the default AllEnzymes.
598 """
599 if not batch:
600 batch = AllEnzymes
601 r = [x for x in batch if (cls >> x) or (not cls != x)]
602 i = r.index(cls)
603 del r[i]
604 r.sort()
605 return r
606
607 @classmethod
609 """Return the theoretically cutting frequency of the enzyme.
610
611 Frequency of the site, given as 'one cut per x bases' (int).
612 """
613 return cls.freq
614
615
616 -class NoCut(AbstractCut):
617 """Implement the methods specific to the enzymes that do not cut.
618
619 These enzymes are generally enzymes that have been only partially
620 characterised and the way they cut the DNA is unknow or enzymes for
621 which the pattern of cut is to complex to be recorded in Rebase
622 (ncuts values of 0 in emboss_e.###).
623
624 When using search() with these enzymes the values returned are at the start
625 of the restriction site.
626
627 Their catalyse() method returns a TypeError.
628
629 Unknown and NotDefined are also part of the base classes of these enzymes.
630
631 Internal use only. Not meant to be instantiated.
632 """
633
634 @classmethod
636 """Return if the cutting pattern has one cut.
637
638 True if the enzyme cut the sequence one time on each strand.
639 """
640 return False
641
642 @classmethod
644 """Return if the cutting pattern has two cuts.
645
646 True if the enzyme cut the sequence twice on each strand.
647 """
648 return False
649
650 @classmethod
652 """Return a generator that moves the cutting position by 1 (PRIVATE).
653
654 For internal use only.
655
656 location is an integer corresponding to the location of the match for
657 the enzyme pattern in the sequence.
658 _modify returns the real place where the enzyme will cut.
659
660 Example::
661
662 EcoRI pattern : GAATTC
663 EcoRI will cut after the G.
664 so in the sequence:
665 ______
666 GAATACACGGAATTCGA
667 |
668 10
669 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
670 EcoRI cut after the G so:
671 EcoRI._modify(10) -> 11.
672
673 If the enzyme cut twice _modify will returns two integer corresponding
674 to each cutting site.
675 """
676 yield location
677
678 @classmethod
680 """Return a generator that moves the cutting position by 1 (PRIVATE).
681
682 For internal use only.
683
684 As _modify for site situated on the antiparallel strand when the
685 enzyme is not palindromic.
686 """
687 yield location
688
689 @classmethod
691 """Return a list of the enzyme's characteristics as tuple.
692
693 the tuple contains the attributes:
694 - fst5 -> first 5' cut ((current strand) or None
695 - fst3 -> first 3' cut (complementary strand) or None
696 - scd5 -> second 5' cut (current strand) or None
697 - scd5 -> second 3' cut (complementary strand) or None
698 - site -> recognition site.
699
700 """
701 return None, None, None, None, cls.site
702
703
704 -class OneCut(AbstractCut):
705 """Implement the methods for enzymes that cut the DNA only once.
706
707 Correspond to ncuts values of 2 in emboss_e.###
708
709 Internal use only. Not meant to be instantiated.
710 """
711
712 @classmethod
714 """Return if the cutting pattern has one cut.
715
716 True if the enzyme cut the sequence one time on each strand.
717 """
718 return True
719
720 @classmethod
722 """Return if the cutting pattern has two cuts.
723
724 True if the enzyme cut the sequence twice on each strand.
725 """
726 return False
727
728 @classmethod
730 """Return a generator that moves the cutting position by 1 (PRIVATE).
731
732 For internal use only.
733
734 location is an integer corresponding to the location of the match for
735 the enzyme pattern in the sequence.
736 _modify returns the real place where the enzyme will cut.
737
738 Example::
739
740 EcoRI pattern : GAATTC
741 EcoRI will cut after the G.
742 so in the sequence:
743 ______
744 GAATACACGGAATTCGA
745 |
746 10
747 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
748 EcoRI cut after the G so:
749 EcoRI._modify(10) -> 11.
750
751 if the enzyme cut twice _modify will returns two integer corresponding
752 to each cutting site.
753 """
754 yield location + cls.fst5
755
756 @classmethod
758 """Return a generator that moves the cutting position by 1 (PRIVATE).
759
760 For internal use only.
761
762 As _modify for site situated on the antiparallel strand when the
763 enzyme is not palindromic
764 """
765 yield location - cls.fst3
766
767 @classmethod
769 """Return a list of the enzyme's characteristics as tuple.
770
771 The tuple contains the attributes:
772 - fst5 -> first 5' cut ((current strand) or None
773 - fst3 -> first 3' cut (complementary strand) or None
774 - scd5 -> second 5' cut (current strand) or None
775 - scd5 -> second 3' cut (complementary strand) or None
776 - site -> recognition site.
777
778 """
779 return cls.fst5, cls.fst3, None, None, cls.site
780
783 """Implement the methods for enzymes that cut the DNA twice.
784
785 Correspond to ncuts values of 4 in emboss_e.###
786
787 Internal use only. Not meant to be instantiated.
788 """
789
790 @classmethod
792 """Return if the cutting pattern has one cut.
793
794 True if the enzyme cut the sequence one time on each strand.
795 """
796 return False
797
798 @classmethod
800 """Return if the cutting pattern has two cuts.
801
802 True if the enzyme cut the sequence twice on each strand.
803 """
804 return True
805
806 @classmethod
808 """Return a generator that moves the cutting position by 1 (PRIVATE).
809
810 For internal use only.
811
812 location is an integer corresponding to the location of the match for
813 the enzyme pattern in the sequence.
814 _modify returns the real place where the enzyme will cut.
815
816 example::
817
818 EcoRI pattern : GAATTC
819 EcoRI will cut after the G.
820 so in the sequence:
821 ______
822 GAATACACGGAATTCGA
823 |
824 10
825 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
826 EcoRI cut after the G so:
827 EcoRI._modify(10) -> 11.
828
829 if the enzyme cut twice _modify will returns two integer corresponding
830 to each cutting site.
831 """
832 yield location + cls.fst5
833 yield location + cls.scd5
834
835 @classmethod
837 """Return a generator that moves the cutting position by 1.
838
839 for internal use only.
840
841 as _modify for site situated on the antiparallel strand when the
842 enzyme is not palindromic
843 """
844 yield location - cls.fst3
845 yield location - cls.scd3
846
847 @classmethod
849 """Return a list of the enzyme's characteristics as tuple.
850
851 the tuple contains the attributes:
852 - fst5 -> first 5' cut ((current strand) or None
853 - fst3 -> first 3' cut (complementary strand) or None
854 - scd5 -> second 5' cut (current strand) or None
855 - scd5 -> second 3' cut (complementary strand) or None
856 - site -> recognition site.
857
858 """
859 return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
860
863 """Implement the information about methylation.
864
865 Enzymes of this class possess a site which is methylable.
866 """
867
868 @classmethod
870 """Return if recognition site can be methylated.
871
872 True if the recognition site is a methylable.
873 """
874 return True
875
878 """Implement information about methylation sensitibility.
879
880 Enzymes of this class are not sensible to methylation.
881 """
882
883 @classmethod
885 """Return if recognition site can be methylated.
886
887 True if the recognition site is a methylable.
888 """
889 return False
890
893 """Implement methods for enzymes with palindromic recognition sites.
894
895 palindromic means : the recognition site and its reverse complement are
896 identical.
897 Remarks : an enzyme with a site CGNNCG is palindromic even if some
898 of the sites that it will recognise are not.
899 for example here : CGAACG
900
901 Internal use only. Not meant to be instantiated.
902 """
903
904 @classmethod
906 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE).
907
908 For internal use only.
909
910 Implement the search method for palindromic enzymes.
911 """
912 siteloc = cls.dna.finditer(cls.compsite, cls.size)
913 cls.results = [r for s, g in siteloc for r in cls._modify(s)]
914 if cls.results:
915 cls._drop()
916 return cls.results
917
918 @classmethod
920 """Return if the enzyme has a palindromic recoginition site."""
921 return True
922
925 """Implement methods for enzymes with non-palindromic recognition sites.
926
927 Palindromic means : the recognition site and its reverse complement are
928 identical.
929
930 Internal use only. Not meant to be instantiated.
931 """
932
933 @classmethod
935 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE).
936
937 For internal use only.
938
939 Implement the search method for non palindromic enzymes.
940 """
941 compsite_for, compsite_rev = cls.compsite.pattern.split('|')
942 iterator_for = cls.dna.finditer(compsite_for, cls.size)
943 iterator_rev = cls.dna.finditer(compsite_rev, cls.size)
944 cls.results = []
945 modif = cls._modify
946 revmodif = cls._rev_modify
947 s = str(cls)
948 cls.on_minus = []
949
950 for start, group in iterator_for:
951 if group(s):
952 cls.results += [r for r in modif(start)]
953 s += '_as'
954 for start, group in iterator_rev:
955 if group(s):
956 cls.results += [r for r in revmodif(start)]
957
958 if cls.results:
959 cls.results.sort()
960 cls._drop()
961 return cls.results
962
963 @classmethod
965 """Return if the enzyme has a palindromic recoginition site."""
966 return False
967
970 """Implement methods for enzymes that produce unknown overhangs.
971
972 These enzymes are also NotDefined and NoCut.
973
974 Internal use only. Not meant to be instantiated.
975 """
976
977 @classmethod
979 """List the sequence fragments after cutting dna with enzyme.
980
981 RE.catalyze(dna, linear=True) -> tuple of DNA.
982
983 Return a tuple of dna as will be produced by using RE to restrict the
984 dna.
985
986 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
987
988 If linear is False, the sequence is considered to be circular and the
989 output will be modified accordingly.
990 """
991 raise NotImplementedError('%s restriction is unknown.'
992 % cls.__name__)
993 catalyze = catalyse
994
995 @classmethod
997 """Return if the enzyme produces blunt ends.
998
999 True if the enzyme produces blunt end.
1000
1001 Related methods:
1002 - RE.is_3overhang()
1003 - RE.is_5overhang()
1004 - RE.is_unknown()
1005
1006 """
1007 return False
1008
1009 @classmethod
1011 """Return if the enzymes produces 5' overhanging ends.
1012
1013 True if the enzyme produces 5' overhang sticky end.
1014
1015 Related methods:
1016 - RE.is_3overhang()
1017 - RE.is_blunt()
1018 - RE.is_unknown()
1019
1020 """
1021 return False
1022
1023 @classmethod
1025 """Return if the enzyme produces 3' overhanging ends.
1026
1027 True if the enzyme produces 3' overhang sticky end.
1028
1029 Related methods:
1030 - RE.is_5overhang()
1031 - RE.is_blunt()
1032 - RE.is_unknown()
1033
1034 """
1035 return False
1036
1037 @classmethod
1039 """Return the type of the enzyme's overhang as string.
1040
1041 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1042 """
1043 return 'unknown'
1044
1045 @classmethod
1047 """List all enzymes that produce compatible ends for the enzyme."""
1048 return []
1049
1050 @classmethod
1052 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1053
1054 For internal use only.
1055
1056 Test for the compatibility of restriction ending of RE and other.
1057 """
1058 return False
1059
1060
1061 -class Blunt(AbstractCut):
1062 """Implement methods for enzymes that produce blunt ends.
1063
1064 The enzyme cuts the + strand and the - strand of the DNA at the same
1065 place.
1066
1067 Internal use only. Not meant to be instantiated.
1068 """
1069
1070 @classmethod
1072 """List the sequence fragments after cutting dna with enzyme.
1073
1074 RE.catalyze(dna, linear=True) -> tuple of DNA.
1075
1076 Return a tuple of dna as will be produced by using RE to restrict the
1077 dna.
1078
1079 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1080
1081 If linear is False, the sequence is considered to be circular and the
1082 output will be modified accordingly.
1083 """
1084 r = cls.search(dna, linear)
1085 d = cls.dna
1086 if not r:
1087 return d[1:],
1088 fragments = []
1089 length = len(r) - 1
1090 if d.is_linear():
1091
1092
1093
1094 fragments.append(d[1:r[0]])
1095 if length:
1096
1097
1098
1099 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1100
1101
1102
1103 fragments.append(d[r[-1]:])
1104 else:
1105
1106
1107
1108 fragments.append(d[r[-1]:] + d[1:r[0]])
1109 if not length:
1110
1111
1112
1113 return tuple(fragments)
1114
1115
1116
1117 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1118 return tuple(fragments)
1119 catalyze = catalyse
1120
1121 @classmethod
1123 """Return if the enzyme produces blunt ends.
1124
1125 True if the enzyme produces blunt end.
1126
1127 Related methods:
1128 - RE.is_3overhang()
1129 - RE.is_5overhang()
1130 - RE.is_unknown()
1131
1132 """
1133 return True
1134
1135 @classmethod
1137 """Return if the enzymes produces 5' overhanging ends.
1138
1139 True if the enzyme produces 5' overhang sticky end.
1140
1141 Related methods:
1142 - RE.is_3overhang()
1143 - RE.is_blunt()
1144 - RE.is_unknown()
1145
1146 """
1147 return False
1148
1149 @classmethod
1151 """Return if the enzyme produces 3' overhanging ends.
1152
1153 True if the enzyme produces 3' overhang sticky end.
1154
1155 Related methods:
1156 - RE.is_5overhang()
1157 - RE.is_blunt()
1158 - RE.is_unknown()
1159
1160 """
1161 return False
1162
1163 @classmethod
1165 """Return the type of the enzyme's overhang as string.
1166
1167 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1168 """
1169 return 'blunt'
1170
1171 @classmethod
1173 """List all enzymes that produce compatible ends for the enzyme."""
1174 if not batch:
1175 batch = AllEnzymes
1176 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt())
1177 return r
1178
1179 @staticmethod
1181 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1182
1183 For internal use only
1184
1185 Test for the compatibility of restriction ending of RE and other.
1186 """
1187 return issubclass(other, Blunt)
1188
1189
1190 -class Ov5(AbstractCut):
1191 """Implement methods for enzymes that produce 5' overhanging ends.
1192
1193 The enzyme cuts the + strand after the - strand of the DNA.
1194
1195 Internal use only. Not meant to be instantiated.
1196 """
1197
1198 @classmethod
1200 """List the sequence fragments after cutting dna with enzyme.
1201
1202 RE.catalyze(dna, linear=True) -> tuple of DNA.
1203
1204 Return a tuple of dna as will be produced by using RE to restrict the
1205 dna.
1206
1207 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1208
1209 If linear is False, the sequence is considered to be circular and the
1210 output will be modified accordingly.
1211 """
1212 r = cls.search(dna, linear)
1213 d = cls.dna
1214 if not r:
1215 return d[1:],
1216 length = len(r) - 1
1217 fragments = []
1218 if d.is_linear():
1219
1220
1221
1222 fragments.append(d[1:r[0]])
1223 if length:
1224
1225
1226
1227 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1228
1229
1230
1231 fragments.append(d[r[-1]:])
1232 else:
1233
1234
1235
1236 fragments.append(d[r[-1]:] + d[1:r[0]])
1237 if not length:
1238
1239
1240
1241 return tuple(fragments)
1242
1243
1244
1245 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1246 return tuple(fragments)
1247 catalyze = catalyse
1248
1249 @classmethod
1251 """Return if the enzyme produces blunt ends.
1252
1253 True if the enzyme produces blunt end.
1254
1255 Related methods:
1256 - RE.is_3overhang()
1257 - RE.is_5overhang()
1258 - RE.is_unknown()
1259
1260 """
1261 return False
1262
1263 @classmethod
1265 """Return if the enzymes produces 5' overhanging ends.
1266
1267 True if the enzyme produces 5' overhang sticky end.
1268
1269 Related methods:
1270 - RE.is_3overhang()
1271 - RE.is_blunt()
1272 - RE.is_unknown()
1273
1274 """
1275 return True
1276
1277 @classmethod
1279 """Return if the enzyme produces 3' overhanging ends.
1280
1281 True if the enzyme produces 3' overhang sticky end.
1282
1283 Related methods:
1284 - RE.is_5overhang()
1285 - RE.is_blunt()
1286 - RE.is_unknown()
1287
1288 """
1289 return False
1290
1291 @classmethod
1293 """Return the type of the enzyme's overhang as string.
1294
1295 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1296 """
1297 return "5' overhang"
1298
1299 @classmethod
1301 """List all enzymes that produce compatible ends for the enzyme."""
1302 if not batch:
1303 batch = AllEnzymes
1304 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and
1305 x % cls)
1306 return r
1307
1308 @classmethod
1310 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1311
1312 For internal use only.
1313
1314 Test for the compatibility of restriction ending of RE and other.
1315 """
1316 if issubclass(other, Ov5):
1317 return cls._mod2(other)
1318 else:
1319 return False
1320
1321
1322 -class Ov3(AbstractCut):
1323 """Implement methods for enzymes that produce 3' overhanging ends.
1324
1325 The enzyme cuts the - strand after the + strand of the DNA.
1326
1327 Internal use only. Not meant to be instantiated.
1328 """
1329
1330 @classmethod
1332 """List the sequence fragments after cutting dna with enzyme.
1333
1334 RE.catalyze(dna, linear=True) -> tuple of DNA.
1335
1336 Return a tuple of dna as will be produced by using RE to restrict the
1337 dna.
1338
1339 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1340
1341 If linear is False, the sequence is considered to be circular and the
1342 output will be modified accordingly.
1343 """
1344 r = cls.search(dna, linear)
1345 d = cls.dna
1346 if not r:
1347 return d[1:],
1348 fragments = []
1349 length = len(r) - 1
1350 if d.is_linear():
1351
1352
1353
1354 fragments.append(d[1:r[0]])
1355 if length:
1356
1357
1358
1359 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1360
1361
1362
1363 fragments.append(d[r[-1]:])
1364 else:
1365
1366
1367
1368 fragments.append(d[r[-1]:] + d[1:r[0]])
1369 if not length:
1370
1371
1372
1373 return tuple(fragments)
1374
1375
1376
1377 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1378 return tuple(fragments)
1379 catalyze = catalyse
1380
1381 @classmethod
1383 """Return if the enzyme produces blunt ends.
1384
1385 True if the enzyme produces blunt end.
1386
1387 Related methods:
1388 - RE.is_3overhang()
1389 - RE.is_5overhang()
1390 - RE.is_unknown()
1391
1392 """
1393 return False
1394
1395 @classmethod
1397 """Return if the enzymes produces 5' overhanging ends.
1398
1399 True if the enzyme produces 5' overhang sticky end.
1400
1401 Related methods:
1402 - RE.is_3overhang()
1403 - RE.is_blunt()
1404 - RE.is_unknown()
1405
1406 """
1407 return False
1408
1409 @classmethod
1411 """Return if the enzyme produces 3' overhanging ends.
1412
1413 True if the enzyme produces 3' overhang sticky end.
1414
1415 Related methods:
1416 - RE.is_5overhang()
1417 - RE.is_blunt()
1418 - RE.is_unknown()
1419
1420 """
1421 return True
1422
1423 @classmethod
1425 """Return the type of the enzyme's overhang as string.
1426
1427 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1428 """
1429 return "3' overhang"
1430
1431 @classmethod
1433 """List all enzymes that produce compatible ends for the enzyme."""
1434 if not batch:
1435 batch = AllEnzymes
1436 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and
1437 x % cls)
1438 return r
1439
1440 @classmethod
1442 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1443
1444 For internal use only.
1445
1446 Test for the compatibility of restriction ending of RE and other.
1447 """
1448
1449
1450
1451 if issubclass(other, Ov3):
1452 return cls._mod2(other)
1453 else:
1454 return False
1455
1458 """Implement methods for enzymes with defined recognition site and cut.
1459
1460 Typical example : EcoRI -> G^AATT_C
1461 The overhang will always be AATT
1462 Notes:
1463 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
1464 Their overhang is always the same : blunt!
1465
1466 Internal use only. Not meant to be instantiated.
1467 """
1468
1469 @classmethod
1471 """Remove cuts that are outsite of the sequence (PRIVATE).
1472
1473 For internal use only.
1474
1475 Drop the site that are situated outside the sequence in linear
1476 sequence. Modify the index for site in circular sequences.
1477 """
1478
1479
1480
1481
1482
1483
1484
1485
1486 length = len(cls.dna)
1487 drop = itertools.dropwhile
1488 take = itertools.takewhile
1489 if cls.dna.is_linear():
1490 cls.results = [x for x in drop(lambda x:x <= 1, cls.results)]
1491 cls.results = [x for x in take(lambda x:x <= length, cls.results)]
1492 else:
1493 for index, location in enumerate(cls.results):
1494 if location < 1:
1495 cls.results[index] += length
1496 else:
1497 break
1498 for index, location in enumerate(cls.results[::-1]):
1499 if location > length:
1500 cls.results[-(index + 1)] -= length
1501 else:
1502 break
1503 return
1504
1505 @classmethod
1507 """Return if recognition sequence and cut are defined.
1508
1509 True if the sequence recognised and cut is constant,
1510 i.e. the recognition site is not degenerated AND the enzyme cut inside
1511 the site.
1512
1513 Related methods:
1514 - RE.is_ambiguous()
1515 - RE.is_unknown()
1516
1517 """
1518 return True
1519
1520 @classmethod
1522 """Return if recognition sequence and cut may be ambiguous.
1523
1524 True if the sequence recognised and cut is ambiguous,
1525 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1526 the site.
1527
1528 Related methods:
1529 - RE.is_defined()
1530 - RE.is_unknown()
1531
1532 """
1533 return False
1534
1535 @classmethod
1537 """Return if recognition sequence is unknown.
1538
1539 True if the sequence is unknown,
1540 i.e. the recognition site has not been characterised yet.
1541
1542 Related methods:
1543 - RE.is_defined()
1544 - RE.is_ambiguous()
1545
1546 """
1547 return False
1548
1549 @classmethod
1551 """Return a string representing the recognition site and cuttings.
1552
1553 Return a representation of the site with the cut on the (+) strand
1554 represented as '^' and the cut on the (-) strand as '_'.
1555 ie:
1556
1557 >>> EcoRI.elucidate() # 5' overhang
1558 'G^AATT_C'
1559 >>> KpnI.elucidate() # 3' overhang
1560 'G_GTAC^C'
1561 >>> EcoRV.elucidate() # blunt
1562 'GAT^_ATC'
1563 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1564 '? GTATAC ?'
1565 >>>
1566
1567 """
1568 f5 = cls.fst5
1569 f3 = cls.fst3
1570 site = cls.site
1571 if cls.cut_twice():
1572 re = 'cut twice, not yet implemented sorry.'
1573 elif cls.is_5overhang():
1574 if f5 == f3 == 0:
1575 re = 'N^' + cls.site + '_N'
1576 elif f3 == 0:
1577 re = site[:f5] + '^' + site[f5:] + '_N'
1578 else:
1579 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1580 elif cls.is_blunt():
1581 re = site[:f5] + '^_' + site[f5:]
1582 else:
1583 if f5 == f3 == 0:
1584 re = 'N_' + site + '^N'
1585 else:
1586 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1587 return re
1588
1589 @classmethod
1591 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1592
1593 For internal use only.
1594
1595 Test for the compatibility of restriction ending of RE and other.
1596 """
1597
1598
1599
1600 if other.ovhgseq == cls.ovhgseq:
1601 return True
1602 elif issubclass(other, Ambiguous):
1603 return other._mod2(cls)
1604 else:
1605 return False
1606
1609 """Implement methods for enzymes that produce variable overhangs.
1610
1611 Typical example : BstXI -> CCAN_NNNN^NTGG
1612 The overhang can be any sequence of 4 bases.
1613
1614 Notes:
1615 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
1616 Their overhang is always the same : blunt!
1617
1618 Internal use only. Not meant to be instantiated.
1619
1620 """
1621
1622 @classmethod
1649
1650 @classmethod
1652 """Return if recognition sequence and cut are defined.
1653
1654 True if the sequence recognised and cut is constant,
1655 i.e. the recognition site is not degenerated AND the enzyme cut inside
1656 the site.
1657
1658 Related methods:
1659 - RE.is_ambiguous()
1660 - RE.is_unknown()
1661
1662 """
1663 return False
1664
1665 @classmethod
1667 """Return if recognition sequence and cut may be ambiguous.
1668
1669 True if the sequence recognised and cut is ambiguous,
1670 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1671 the site.
1672
1673 Related methods:
1674 - RE.is_defined()
1675 - RE.is_unknown()
1676
1677 """
1678 return True
1679
1680 @classmethod
1682 """Return if recognition sequence is unknown.
1683
1684 True if the sequence is unknown,
1685 i.e. the recognition site has not been characterised yet.
1686
1687 Related methods:
1688 - RE.is_defined()
1689 - RE.is_ambiguous()
1690
1691 """
1692 return False
1693
1694 @classmethod
1696 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1697
1698 For internal use only.
1699
1700 Test for the compatibility of restriction ending of RE and other.
1701 """
1702
1703
1704
1705 if len(cls.ovhgseq) != len(other.ovhgseq):
1706 return False
1707 else:
1708 se = cls.ovhgseq
1709 for base in se:
1710 if base in 'ATCG':
1711 pass
1712 if base in 'N':
1713 se = '.'.join(se.split('N'))
1714 if base in 'RYWMSKHDBV':
1715 expand = '[' + matching[base] + ']'
1716 se = expand.join(se.split(base))
1717 if re.match(se, other.ovhgseq):
1718 return True
1719 else:
1720 return False
1721
1722 @classmethod
1724 """Return a string representing the recognition site and cuttings.
1725
1726 Return a representation of the site with the cut on the (+) strand
1727 represented as '^' and the cut on the (-) strand as '_'.
1728 ie:
1729
1730 >>> EcoRI.elucidate() # 5' overhang
1731 'G^AATT_C'
1732 >>> KpnI.elucidate() # 3' overhang
1733 'G_GTAC^C'
1734 >>> EcoRV.elucidate() # blunt
1735 'GAT^_ATC'
1736 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1737 '? GTATAC ?'
1738 >>>
1739
1740 """
1741 f5 = cls.fst5
1742 f3 = cls.fst3
1743 length = len(cls)
1744 site = cls.site
1745 if cls.cut_twice():
1746 re = 'cut twice, not yet implemented sorry.'
1747 elif cls.is_5overhang():
1748 if f3 == f5 == 0:
1749 re = 'N^' + site + '_N'
1750 elif 0 <= f5 <= length and 0 <= f3 + length <= length:
1751 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1752 elif 0 <= f5 <= length:
1753 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N'
1754 elif 0 <= f3 + length <= length:
1755 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1756 elif f3 + length < 0:
1757 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site
1758 elif f5 > length:
1759 re = site + (f5 - length) * 'N' + '^' + (length +
1760 f3 - f5) * 'N' + '_N'
1761 else:
1762 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N'
1763 elif cls.is_blunt():
1764 if f5 < 0:
1765 re = 'N^_' + abs(f5) * 'N' + site
1766 elif f5 > length:
1767 re = site + (f5 - length) * 'N' + '^_N'
1768 else:
1769 raise ValueError('%s.easyrepr() : error f5=%i'
1770 % (cls.name, f5))
1771 else:
1772 if f3 == 0:
1773 if f5 == 0:
1774 re = 'N_' + site + '^N'
1775 else:
1776 re = site + '_' + (f5 - length) * 'N' + '^N'
1777 elif 0 < f3 + length <= length and 0 <= f5 <= length:
1778 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1779 elif 0 < f3 + length <= length:
1780 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N'
1781 elif 0 <= f5 <= length:
1782 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:]
1783 elif f3 > 0:
1784 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N'
1785 elif f5 < 0:
1786 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' \
1787 + site
1788 else:
1789 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * \
1790 'N' + '^N'
1791 return re
1792
1795 """Implement methods for enzymes with non-characterized overhangs.
1796
1797 Correspond to NoCut and Unknown.
1798
1799 Internal use only. Not meant to be instantiated.
1800 """
1801
1802 @classmethod
1804 """Remove cuts that are outsite of the sequence (PRIVATE).
1805
1806 For internal use only.
1807
1808 Drop the site that are situated outside the sequence in linear
1809 sequence. Modify the index for site in circular sequences.
1810 """
1811 if cls.dna.is_linear():
1812 return
1813 else:
1814 length = len(cls.dna)
1815 for index, location in enumerate(cls.results):
1816 if location < 1:
1817 cls.results[index] += length
1818 else:
1819 break
1820 for index, location in enumerate(cls.results[:-1]):
1821 if location > length:
1822 cls.results[-(index + 1)] -= length
1823 else:
1824 break
1825 return
1826
1827 @classmethod
1829 """Return if recognition sequence and cut are defined.
1830
1831 True if the sequence recognised and cut is constant,
1832 i.e. the recognition site is not degenerated AND the enzyme cut inside
1833 the site.
1834
1835 Related methods:
1836 - RE.is_ambiguous()
1837 - RE.is_unknown()
1838
1839 """
1840 return False
1841
1842 @classmethod
1844 """Return if recognition sequence and cut may be ambiguous.
1845
1846 True if the sequence recognised and cut is ambiguous,
1847 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1848 the site.
1849
1850 Related methods:
1851 - RE.is_defined()
1852 - RE.is_unknown()
1853
1854 """
1855 return False
1856
1857 @classmethod
1859 """Return if recognition sequence is unknown.
1860
1861 True if the sequence is unknown,
1862 i.e. the recognition site has not been characterised yet.
1863
1864 Related methods:
1865 - RE.is_defined()
1866 - RE.is_ambiguous()
1867
1868 """
1869 return True
1870
1871 @classmethod
1873 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1874
1875 For internal use only.
1876
1877 Test for the compatibility of restriction ending of RE and other.
1878 """
1879
1880
1881
1882
1883
1884
1885
1886 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!"
1887 % (str(cls), str(other), str(cls)))
1888
1889 @classmethod
1891 """Return a string representing the recognition site and cuttings.
1892
1893 Return a representation of the site with the cut on the (+) strand
1894 represented as '^' and the cut on the (-) strand as '_'.
1895 ie:
1896
1897 >>> EcoRI.elucidate() # 5' overhang
1898 'G^AATT_C'
1899 >>> KpnI.elucidate() # 3' overhang
1900 'G_GTAC^C'
1901 >>> EcoRV.elucidate() # blunt
1902 'GAT^_ATC'
1903 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1904 '? GTATAC ?'
1905 >>>
1906
1907 """
1908 return '? %s ?' % cls.site
1909
1912 """Implement methods for enzymes which are commercially available.
1913
1914 Internal use only. Not meant to be instantiated.
1915 """
1916
1917
1918
1919
1920
1921
1922 @classmethod
1924 """Print a list of suppliers of the enzyme."""
1925 for s in cls.suppl:
1926 print(suppliers_dict[s][0] + ',')
1927 return
1928
1929 @classmethod
1931 """Return a list of suppliers of the enzyme."""
1932 return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
1933
1934 @classmethod
1936 """Return the recommended buffer of the supplier for this enzyme.
1937
1938 Not implemented yet.
1939 """
1940 return
1941
1942 @classmethod
1944 """Return if enzyme is commercially available.
1945
1946 True if RE has suppliers.
1947 """
1948 return True
1949
1952 """Implement methods for enzymes which are not commercially available.
1953
1954 Internal use only. Not meant to be instantiated.
1955 """
1956
1957 @staticmethod
1959 """Print a list of suppliers of the enzyme."""
1960 return None
1961
1962 @classmethod
1964 """Return a list of suppliers of the enzyme."""
1965 return []
1966
1967 @classmethod
1969 """Return the recommended buffer of the supplier for this enzyme.
1970
1971 Not implemented yet.
1972 """
1973 raise TypeError("Enzyme not commercially available.")
1974
1975 @classmethod
1977 """Return if enzyme is commercially available.
1978
1979 True if RE has suppliers.
1980 """
1981 return False
1982
1992 """Class for operations on more than one enzyme."""
1993
1994 - def __init__(self, first=(), suppliers=()):
2002
2004 if len(self) < 5:
2005 return '+'.join(self.elements())
2006 else:
2007 return '...'.join(('+'.join(self.elements()[:2]),
2008 '+'.join(self.elements()[-2:])))
2009
2011 return 'RestrictionBatch(%s)' % self.elements()
2012
2019
2021 """Override '/' operator to use as search method."""
2022 return self.search(other)
2023
2025 """Override division with reversed operands to use as search method."""
2026 return self.search(other)
2027
2029 """Override Python 3 division operator to use as search method.
2030
2031 Like __div__.
2032 """
2033 return self.search(other)
2034
2036 """As __truediv___, with reversed operands.
2037
2038 Like __rdiv__.
2039 """
2040 return self.search(other)
2041
2042 - def get(self, enzyme, add=False):
2043 """Check if enzyme is in batch and return it.
2044
2045 If add is True and enzyme is not in batch add enzyme to batch.
2046 If add is False (which is the default) only return enzyme.
2047 If enzyme is not a RestrictionType or can not be evaluated to
2048 a RestrictionType, raise a ValueError.
2049 """
2050 e = self.format(enzyme)
2051 if e in self:
2052 return e
2053 elif add:
2054 self.add(e)
2055 return e
2056 else:
2057 raise ValueError('enzyme %s is not in RestrictionBatch'
2058 % e.__name__)
2059
2061 """Filter enzymes in batch with supplied function.
2062
2063 The new batch will contain only the enzymes for which
2064 func return True.
2065 """
2066 d = [x for x in filter(func, self)]
2067 new = RestrictionBatch()
2068 new._data = dict(zip(d, [True] * len(d)))
2069 return new
2070
2072 """Add all enzymes from a given supplier to batch.
2073
2074 letter represents the suppliers as defined in the dictionary
2075 RestrictionDictionary.suppliers
2076 Returns None.
2077 Raise a KeyError if letter is not a supplier code.
2078 """
2079 supplier = suppliers_dict[letter]
2080 self.suppliers.append(letter)
2081 for x in supplier[1]:
2082 self.add_nocheck(eval(x))
2083 return
2084
2086 """List the current suppliers for the restriction batch.
2087
2088 Return a sorted list of the suppliers which have been used to
2089 create the batch.
2090 """
2091 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers)
2092 return suppl_list
2093
2095 """Override '+=' for use with sets.
2096
2097 b += other -> add other to b, check the type of other.
2098 """
2099 self.add(other)
2100 return self
2101
2103 """Overide '+' for use with sets.
2104
2105 b + other -> new RestrictionBatch.
2106 """
2107 new = self.__class__(self)
2108 new.add(other)
2109 return new
2110
2112 """Remove enzyme from restriction batch.
2113
2114 Safe set.remove method. Verify that other is a RestrictionType or can
2115 be evaluated to a RestrictionType.
2116 Raise a ValueError if other can not be evaluated to a RestrictionType.
2117 Raise a KeyError if other is not in B.
2118 """
2119 return set.remove(self, self.format(other))
2120
2121 - def add(self, other):
2122 """Add a restriction enzyme to the restriction batch.
2123
2124 Safe set.add method. Verify that other is a RestrictionType or can be
2125 evaluated to a RestrictionType.
2126 Raise a ValueError if other can not be evaluated to a RestrictionType.
2127 """
2128 return set.add(self, self.format(other))
2129
2131 """Add restriction enzyme to batch without checking its type."""
2132 return set.add(self, other)
2133
2151
2153 """Return if enzyme (name) is a known enzyme.
2154
2155 True if y or eval(y) is a RestrictionType.
2156 """
2157 return (isinstance(y, RestrictionType) or
2158 isinstance(eval(str(y)), RestrictionType))
2159
2160 - def split(self, *classes, **bool):
2161 """Extract enzymes of a certain class and put in new RestrictionBatch.
2162
2163 B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
2164
2165 It works but it is slow, so it has really an interest when splitting
2166 over multiple conditions.
2167 """
2168 def splittest(element):
2169 for klass in classes:
2170 b = bool.get(klass.__name__, True)
2171 if issubclass(element, klass):
2172 if b:
2173 continue
2174 else:
2175 return False
2176 elif b:
2177 return False
2178 else:
2179 continue
2180 return True
2181 d = [k for k in filter(splittest, self)]
2182 new = RestrictionBatch()
2183 new._data = dict(zip(d, [True] * len(d)))
2184 return new
2185
2187 """List the enzymes of the RestrictionBatch as list of strings.
2188
2189 Give all the names of the enzymes in B sorted alphabetically.
2190 """
2191 l = sorted(str(e) for e in self)
2192 return l
2193
2195 """List the names of the enzymes of the RestrictionBatch.
2196
2197 Return a list of the name of the elements of the batch.
2198 """
2199 return [str(e) for e in self]
2200
2201 @classmethod
2203 """Return a dicionary with supplier codes.
2204
2205 Letter code for the suppliers.
2206 """
2207 supply = dict((k, v[0]) for k, v in suppliers_dict.items())
2208 return supply
2209
2210 @classmethod
2212 """Print a list of supplier codes."""
2213 supply = [' = '.join(i) for i in cls.suppl_codes().items()]
2214 print('\n'.join(supply))
2215 return
2216
2217 - def search(self, dna, linear=True):
2218 """Return a dic of cutting sites in the seq for the batch enzymes."""
2219
2220
2221
2222
2223 if not hasattr(self, "already_mapped"):
2224
2225
2226 self.already_mapped = None
2227 if isinstance(dna, DNA):
2228
2229
2230
2231
2232 if (str(dna), linear) == self.already_mapped:
2233 return self.mapping
2234 else:
2235 self.already_mapped = str(dna), linear
2236 fseq = FormattedSeq(dna, linear)
2237 self.mapping = dict((x, x.search(fseq)) for x in self)
2238 return self.mapping
2239 elif isinstance(dna, FormattedSeq):
2240 if (str(dna), dna.linear) == self.already_mapped:
2241 return self.mapping
2242 else:
2243 self.already_mapped = str(dna), dna.linear
2244 self.mapping = dict((x, x.search(dna)) for x in self)
2245 return self.mapping
2246 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"
2247 % type(dna))
2248
2249
2250
2251
2252
2253
2254
2255
2256 -class Analysis(RestrictionBatch, PrintFormat):
2257 """Provide methods for enhanced analysis and pretty printing."""
2258
2261 """Initialize an Analysis with RestrictionBatch and sequence.
2262
2263 Analysis([restrictionbatch [, sequence] linear=True])
2264 -> New Analysis class.
2265
2266 For most of the methods of this class if a dictionary is given it will
2267 be used as the base to calculate the results.
2268 If no dictionary is given a new analysis using the RestrictionBatch
2269 which has been given when the Analysis class has been instantiated,
2270 will be carried out and used.
2271 """
2272 RestrictionBatch.__init__(self, restrictionbatch)
2273 self.rb = restrictionbatch
2274 self.sequence = sequence
2275 self.linear = linear
2276 if self.sequence:
2277 self.search(self.sequence, self.linear)
2278
2280 return 'Analysis(%s,%s,%s)' %\
2281 (repr(self.rb), repr(self.sequence), self.linear)
2282
2284 """Filter result for keys which are in wanted.
2285
2286 A._sub_set(other_set) -> dict.
2287
2288 Internal use only.
2289
2290 Screen the results through wanted set.
2291 Keep only the results for which the enzymes is in wanted set.
2292 """
2293
2294 return dict((k, v) for k, v in self.mapping.items() if k in wanted)
2295
2297 """Set boundaries to correct values.
2298
2299 Format the boundaries for use with the methods that limit the
2300 search to only part of the sequence given to analyse.
2301 """
2302 if not isinstance(start, int):
2303 raise TypeError('expected int, got %s instead' % type(start))
2304 if not isinstance(end, int):
2305 raise TypeError('expected int, got %s instead' % type(end))
2306 if start < 1:
2307 start += len(self.sequence)
2308 if end < 1:
2309 end += len(self.sequence)
2310 if start < end:
2311 pass
2312 else:
2313 start, end = end, start
2314 if start < end:
2315 return start, end, self._test_normal
2316
2318 """Test if site is between start and end.
2319
2320 Internal use only
2321 """
2322 return start <= site < end
2323
2325 """Test if site is between end and start (for circular sequences).
2326
2327 Internal use only.
2328 """
2329 return start <= site <= len(self.sequence) or 1 <= site < end
2330
2341
2342 - def print_that(self, dct=None, title='', s1=''):
2343 """Print the output of the analysis.
2344
2345 A.print_that([dct[, title[, s1[,print_]]]]) -> print the results
2346 from dct.
2347
2348 If dct is not given the full dictionary is used.
2349 s1: Title for non-cutting enzymes
2350 This method prints the output of A.format_output() and it is here
2351 for backwards compatibility.
2352 """
2353 print(self.format_output(dct, title, s1))
2354
2356 """Change parameters of print output.
2357
2358 `A.change(**attribute_name)` -> Change attribute of Analysis.
2359
2360 It is possible to change the width of the shell by setting
2361 self.ConsoleWidth to what you want.
2362 self.NameWidth refer to the maximal length of the enzyme name.
2363
2364 Changing one of these parameters here might not give the results
2365 you expect. In which case, you can settle back to a 80 columns shell
2366 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2367 you get it right.
2368 """
2369 for k, v in what.items():
2370 if k in ('NameWidth', 'ConsoleWidth'):
2371 setattr(self, k, v)
2372 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2373 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2374 elif k == 'sequence':
2375 setattr(self, 'sequence', v)
2376 self.search(self.sequence, self.linear)
2377 elif k == 'rb':
2378 self = Analysis.__init__(self, v, self.sequence, self.linear)
2379 elif k == 'linear':
2380 setattr(self, 'linear', v)
2381 self.search(self.sequence, v)
2382 elif k in ('Indent', 'Maxsize'):
2383 setattr(self, k, v)
2384 elif k in ('Cmodulo', 'PrefWidth'):
2385 raise AttributeError(
2386 'To change %s, change NameWidth and/or ConsoleWidth' % k)
2387 else:
2388 raise AttributeError('Analysis has no attribute %s' % k)
2389 return
2390
2391 - def full(self, linear=True):
2392 """Perform analysis with all enzymes of batch and return all results.
2393
2394 A.full() -> dict.
2395
2396 Full Restriction Map of the sequence.
2397 """
2398 return self.mapping
2399
2400 - def blunt(self, dct=None):
2401 """Return only cuts that have blunt ends."""
2402 if not dct:
2403 dct = self.mapping
2404 return dict((k, v) for k, v in dct.items() if k.is_blunt())
2405
2407 """Return only cuts that have 5' overhangs."""
2408 if not dct:
2409 dct = self.mapping
2410 return dict((k, v) for k, v in dct.items() if k.is_5overhang())
2411
2413 """Return only cuts that have 3' overhangs."""
2414 if not dct:
2415 dct = self.mapping
2416 return dict((k, v) for k, v in dct.items() if k.is_3overhang())
2417
2419 """Return only results from enzymes that produce defined overhangs."""
2420 if not dct:
2421 dct = self.mapping
2422 return dict((k, v) for k, v in dct.items() if k.is_defined())
2423
2425 """Return only results from enzyme with at least one cut."""
2426 if not dct:
2427 dct = self.mapping
2428 return dict((k, v) for k, v in dct.items() if v)
2429
2431 """Return only results from enzymes that don't cut the sequence."""
2432 if not dct:
2433 dct = self.mapping
2434 return dict((k, v) for k, v in dct.items() if not v)
2435
2437 """Return only results from enzymes that cut the sequence N times."""
2438 if not dct:
2439 dct = self.mapping
2440 return dict((k, v) for k, v in dct.items()if len(v) == N)
2441
2443 """Return only results from enzymes that cut (x,y,z,...) times."""
2444 if not dct:
2445 dct = self.mapping
2446 return dict((k, v) for k, v in dct.items() if len(v) in list)
2447
2449 """Return only results from enzymes which names are listed."""
2450 for i, enzyme in enumerate(names):
2451 if enzyme not in AllEnzymes:
2452 warnings.warn("no data for the enzyme: %s" % enzyme,
2453 BiopythonWarning)
2454 del names[i]
2455 if not dct:
2456 return RestrictionBatch(names).search(self.sequence, self.linear)
2457 return dict((n, dct[n]) for n in names if n in dct)
2458
2460 """Return only results form enzymes with a given site size."""
2461 sites = [name for name in self if name.size == site_size]
2462 if not dct:
2463 return RestrictionBatch(sites).search(self.sequence)
2464 return dict((k, v) for k, v in dct.items() if k in site_size)
2465
2467 """Return only results from enzymes that only cut within start, end."""
2468 start, end, test = self._boundaries(start, end)
2469 if not dct:
2470 dct = self.mapping
2471 d = dict(dct)
2472 for key, sites in dct.items():
2473 if not sites:
2474 del d[key]
2475 continue
2476 for site in sites:
2477 if test(start, end, site):
2478 continue
2479 else:
2480 del d[key]
2481 break
2482 return d
2483
2484 - def between(self, start, end, dct=None):
2485 """Return only results from enzymes that cut at least within borders.
2486
2487 Enzymes that cut the sequence at least in between start and end.
2488 They may cut outside as well.
2489 """
2490 start, end, test = self._boundaries(start, end)
2491 d = {}
2492 if not dct:
2493 dct = self.mapping
2494 for key, sites in dct.items():
2495 for site in sites:
2496 if test(start, end, site):
2497 d[key] = sites
2498 break
2499 continue
2500 return d
2501
2503 """Return only results from within start, end.
2504
2505 Enzymes must cut inside start/end and may also cut outside. However,
2506 only the cutting positions within start/end will be returned.
2507 """
2508 d = []
2509 if start <= end:
2510 d = [(k, [vv for vv in v if start <= vv <= end])
2511 for k, v in self.between(start, end, dct).items()]
2512 else:
2513 d = [(k, [vv for vv in v if start <= vv or vv <= end])
2514 for k, v in self.between(start, end, dct).items()]
2515 return dict(d)
2516
2518 """Return only results from enzymes that only cut outside start, end.
2519
2520 Enzymes that cut the sequence outside of the region
2521 in between start and end but do not cut inside.
2522 """
2523 start, end, test = self._boundaries(start, end)
2524 if not dct:
2525 dct = self.mapping
2526 d = dict(dct)
2527 for key, sites in dct.items():
2528 if not sites:
2529 del d[key]
2530 continue
2531 for site in sites:
2532 if test(start, end, site):
2533 del d[key]
2534 break
2535 else:
2536 continue
2537 return d
2538
2539 - def outside(self, start, end, dct=None):
2540 """Return only results from enzymes that at least cut outside borders.
2541
2542 Enzymes that cut outside the region in between start and end.
2543 They may cut inside as well.
2544 """
2545 start, end, test = self._boundaries(start, end)
2546 if not dct:
2547 dct = self.mapping
2548 d = {}
2549 for key, sites in dct.items():
2550 for site in sites:
2551 if test(start, end, site):
2552 continue
2553 else:
2554 d[key] = sites
2555 break
2556 return d
2557
2559 """Return only results from enzymes that don't cut between borders."""
2560 if not dct:
2561 dct = self.mapping
2562 d = self.without_site()
2563 d.update(self.only_outside(start, end, dct))
2564 return d
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589 CommOnly = RestrictionBatch()
2590 NonComm = RestrictionBatch()
2591 for TYPE, (bases, enzymes) in typedict.items():
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609 bases = tuple(eval(x) for x in bases)
2610
2611
2612
2613
2614 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2615 for k in enzymes:
2616
2617
2618
2619
2620
2621 newenz = T(k, bases, enzymedict[k])
2622
2623
2624
2625
2626
2627 if newenz.is_comm():
2628 CommOnly.add_nocheck(newenz)
2629 else:
2630 NonComm.add_nocheck(newenz)
2631
2632
2633
2634 AllEnzymes = RestrictionBatch(CommOnly)
2635 AllEnzymes.update(NonComm)
2636
2637
2638
2639 names = [str(x) for x in AllEnzymes]
2640 try:
2641 del x
2642 except NameError:
2643
2644 pass
2645 locals().update(dict(zip(names, AllEnzymes)))
2646 __all__ = ('FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes',
2647 'CommOnly', 'NonComm') + tuple(names)
2648 del k, enzymes, TYPE, bases, names
2649